Continuous/linear regression
## A1 expr
## use broom::glance to get rsquared and pval
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_summary <- d.DeKegel_top5_pairs_drug_sens_expr %>%
filter(!is.na(dependency)) %>%
filter(!is.na(A1_log_tpm)) %>%
group_by(target, compound_id) %>%
group_modify(~ broom::glance(lm(dependency ~ A1_log_tpm, data = .x))) %>%
dplyr::ungroup() %>%
rename(r_squared = r.squared, p_val = p.value)
## FDR-adjust p-vals for multiple testing and add back to summary DF
d.stats <- BH_adjust(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_summary)
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_summary <- d.stats %>%
dplyr::select(-p_val) %>%
right_join(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_summary, by = c("target", "compound_id")) %>%
dplyr::select(target, compound_id, r_squared, p_val, fdr)
## use broom::tidy to get slope
## note: group_modify expects a DF output
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_slope <- d.DeKegel_top5_pairs_drug_sens_expr %>%
filter(!is.na(dependency)) %>%
filter(!is.na(A1_log_tpm)) %>%
group_by(target, compound_id) %>%
group_modify(~ broom::tidy(lm(dependency ~ A1_log_tpm, data = .x))) %>%
dplyr::ungroup() %>%
## extract slope of lm fit line from output
filter(term == "A1_log_tpm") %>%
rename(slope = estimate) %>%
dplyr::select(target, compound_id, slope)
## add rsquared, p-val, fdr, and slope info to main DF
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm <- d.DeKegel_top5_pairs_drug_sens_expr %>%
left_join(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_summary, by = c("target", "compound_id")) %>%
left_join(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_slope, by = c("target", "compound_id")) %>%
filter(!is.na(dependency)) %>%
filter(!is.na(A1_log_tpm))
summarize_stats(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm)
## df: d.DeKegel_top5_pairs_drug_sens_A1_expr_lm
## # target/compound pairs with
## p < 0.05: 481
## p < 0.01: 249
## FDR < 0.1: 251
## FDR < 0.05: 142
## A2 expr
## use broom::glance to get rsquared and pval
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_summary <- d.DeKegel_top5_pairs_drug_sens_expr %>%
filter(!is.na(dependency)) %>%
filter(!is.na(A2_log_tpm)) %>%
group_by(target, compound_id) %>%
group_modify(~ broom::glance(lm(dependency ~ A2_log_tpm, data = .x))) %>%
dplyr::ungroup() %>%
rename(r_squared = r.squared, p_val = p.value)
## FDR-adjust p-vals for multiple testing and add back to summary DF
d.stats <- BH_adjust(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_summary)
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_summary <- d.stats %>%
dplyr::select(-p_val) %>%
right_join(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_summary, by = c("target", "compound_id")) %>%
dplyr::select(target, compound_id, r_squared, p_val, fdr)
## use broom::tidy to get slope
## note: group_modify expects a DF output
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_slope <- d.DeKegel_top5_pairs_drug_sens_expr %>%
filter(!is.na(dependency)) %>%
filter(!is.na(A2_log_tpm)) %>%
group_by(target, compound_id) %>%
group_modify(~ broom::tidy(lm(dependency ~ A2_log_tpm, data = .x))) %>%
dplyr::ungroup() %>%
## extract slope of lm fit line from output
filter(term == "A2_log_tpm") %>%
rename(slope = estimate) %>%
dplyr::select(target, compound_id, slope)
## add rsquared, p-val, fdr, and slope info to main DF
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm <- d.DeKegel_top5_pairs_drug_sens_expr %>%
left_join(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_summary, by = c("target", "compound_id")) %>%
left_join(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_slope, by = c("target", "compound_id")) %>%
filter(!is.na(dependency)) %>%
filter(!is.na(A2_log_tpm))
summarize_stats(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm)
## df: d.DeKegel_top5_pairs_drug_sens_A2_expr_lm
## # target/compound pairs with
## p < 0.05: 340
## p < 0.01: 156
## FDR < 0.1: 128
## FDR < 0.05: 74
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm %>%
distinct(target, compound_id, .keep_all = TRUE) %>%
mutate(slope_flag = ifelse(slope < 0, "negative", "positive")) %>%
group_by(slope_flag) %>%
summarize(n = n()) %>%
print_kbl()
|
slope_flag
|
n
|
|
negative
|
1343
|
|
positive
|
1039
|
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm %>%
distinct(target, compound_id, .keep_all = TRUE) %>%
mutate(slope_flag = ifelse(slope < 0, "negative", "positive")) %>%
group_by(slope_flag) %>%
summarize(n = n()) %>%
print_kbl()
|
slope_flag
|
n
|
|
negative
|
1222
|
|
positive
|
1160
|
## get low expression = low LFC pairs (positive slope)
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_pos_slope <-d.DeKegel_top5_pairs_drug_sens_A1_expr_lm %>%
filter(slope > 0)
summarize_stats(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_pos_slope)
## df: d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_pos_slope
## # target/compound pairs with
## p < 0.05: 120
## p < 0.01: 48
## FDR < 0.1: 48
## FDR < 0.05: 29
## get high expression = low LFC pairs (negative slope)
d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_neg_slope <- d.DeKegel_top5_pairs_drug_sens_A1_expr_lm %>%
filter(slope < 0)
summarize_stats(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_neg_slope)
## df: d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_neg_slope
## # target/compound pairs with
## p < 0.05: 361
## p < 0.01: 201
## FDR < 0.1: 203
## FDR < 0.05: 113
## get low expression = low LFC pairs (positive slope)
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_pos_slope <-d.DeKegel_top5_pairs_drug_sens_A2_expr_lm %>%
filter(slope > 0)
summarize_stats(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_pos_slope)
## df: d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_pos_slope
## # target/compound pairs with
## p < 0.05: 158
## p < 0.01: 82
## FDR < 0.1: 67
## FDR < 0.05: 39
## get high expression = low LFC pairs (negative slope)
d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_neg_slope <- d.DeKegel_top5_pairs_drug_sens_A2_expr_lm %>%
filter(slope < 0)
summarize_stats(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_neg_slope)
## df: d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_neg_slope
## # target/compound pairs with
## p < 0.05: 182
## p < 0.01: 74
## FDR < 0.1: 61
## FDR < 0.05: 35
Plot A1 top hits
A1_top_hits <- d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_pos_slope %>%
dplyr::select(target, compound_name, r_squared, fdr) %>%
distinct(target, compound_name, .keep_all = TRUE) %>%
arrange(fdr)
print_kbl(head(A1_top_hits, n = 20))
|
target
|
compound_name
|
r_squared
|
fdr
|
|
FYN_SRC
|
vandetanib
|
0.0659658
|
0.0000014
|
|
FYN_SRC
|
saracatinib
|
0.0572548
|
0.0000035
|
|
ATP1A1_ATP1A2
|
k-strophanthidin
|
0.0587694
|
0.0000035
|
|
ATP1A1_ATP1A3
|
k-strophanthidin
|
0.0587694
|
0.0000035
|
|
ATP1A1_ATP1A4
|
k-strophanthidin
|
0.0587694
|
0.0000035
|
|
FYN_YES1
|
saracatinib
|
0.0572548
|
0.0000035
|
|
FYN_SRC
|
PD-168393
|
0.0534604
|
0.0000095
|
|
ARAF_RAF1
|
AZ-628
|
0.0376702
|
0.0004196
|
|
MAPK11_MAPK14
|
tyrphostin-AG-1478
|
0.0336892
|
0.0008928
|
|
FYN_SRC
|
bosutinib
|
0.0326309
|
0.0011196
|
|
FYN_YES1
|
bosutinib
|
0.0326309
|
0.0011196
|
|
MAP2K1_MAP2K2
|
arctigenin
|
0.0301968
|
0.0024172
|
|
CDK2_CDK3
|
NU6027
|
0.0258299
|
0.0066929
|
|
CDK2_CDK5
|
NU6027
|
0.0258299
|
0.0066929
|
|
EPAS1_HIF1A
|
BAY-87-2243
|
0.0245436
|
0.0097530
|
|
EPAS1_HIF1A
|
2-methoxyestradiol
|
0.0239446
|
0.0107025
|
|
NR1I2_VDR
|
erlotinib
|
0.0219698
|
0.0137919
|
|
CAMK2D_CAMK2G
|
bosutinib
|
0.0215397
|
0.0151139
|
|
ESR1_ESR2
|
ethynodiol-diacetate
|
0.0209548
|
0.0180253
|
|
GSK3A_GSK3B
|
SB-203580
|
0.0209268
|
0.0181552
|
A1_top_hits_plot_list <- A1_top_hits %>%
arrange(fdr) %>%
unite("target_compound_label", c("target", "compound_name"), sep = "\n", remove = FALSE) %>%
head(n = 9) %>%
pull(target_compound_label)
make_lm_plot(d.DeKegel_top5_pairs_drug_sens_A1_expr_lm_pos_slope, A1_top_hits_plot_list, A1_log_tpm)

Plot A2 top hits
A2_top_hits <- d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_pos_slope %>%
dplyr::select(target, compound_name, r_squared, fdr) %>%
distinct(target, compound_name, .keep_all = TRUE) %>%
arrange(fdr)
print_kbl(head(A2_top_hits, n = 20))
|
target
|
compound_name
|
r_squared
|
fdr
|
|
AKT1_AKT3
|
GSK2110183
|
0.0982682
|
0.0000000
|
|
AKT2_AKT3
|
GSK2110183
|
0.0982682
|
0.0000000
|
|
AKT1_AKT3
|
MK-2206
|
0.0873418
|
0.0000000
|
|
AKT2_AKT3
|
MK-2206
|
0.0873418
|
0.0000000
|
|
AKT1_AKT3
|
GDC-0068
|
0.0607472
|
0.0000013
|
|
AKT2_AKT3
|
GDC-0068
|
0.0607472
|
0.0000013
|
|
MAP2K1_MAP2K2
|
bosutinib
|
0.0474871
|
0.0000464
|
|
AKT1_AKT3
|
AZD5363
|
0.0474116
|
0.0000464
|
|
AKT2_AKT3
|
AZD5363
|
0.0474116
|
0.0000464
|
|
ABL1_ABL2
|
saracatinib
|
0.0444061
|
0.0000956
|
|
PIK3R1_PIK3R2
|
CUDC-907
|
0.0410053
|
0.0003328
|
|
CDK2_CDK5
|
bosutinib
|
0.0371005
|
0.0006195
|
|
PTPN11_PTPN6
|
BVT-948
|
0.0264080
|
0.0107209
|
|
CDK1_CDK2
|
NU6027
|
0.0258299
|
0.0111549
|
|
HDAC4_HDAC5
|
romidepsin
|
0.0246434
|
0.0157450
|
|
EPAS1_HIF1A
|
BAY-87-2243
|
0.0243860
|
0.0160324
|
|
ROCK1_ROCK2
|
SB-203580
|
0.0236855
|
0.0187571
|
|
MAP3K2_MAP3K3
|
bosutinib
|
0.0233228
|
0.0195743
|
|
AKT1_AKT2
|
A-674563
|
0.0221977
|
0.0259942
|
|
PPP3CA_PPP3CB
|
cyclosporin-a
|
0.0240152
|
0.0260890
|
A2_top_hits_plot_list <- A2_top_hits %>%
arrange(fdr) %>%
unite("target_compound_label", c("target", "compound_name"), sep = "\n", remove = FALSE) %>%
head(n = 9) %>%
pull(target_compound_label)
make_lm_plot(d.DeKegel_top5_pairs_drug_sens_A2_expr_lm_pos_slope, A2_top_hits_plot_list, A2_log_tpm)

Outliers for top lm hits
## A2 top hits
target_list <- c("AKT1_AKT2", "AKT2_AKT3", "AKT1_AKT3", "PTPN11_PTPN6", "CDK2_CDK5", "CDK1_CDK2", "MAP2K1_MAP2K2")
compound_list <- c("GSK2110183", "MK−2206", "GDC−0068", "AZD5363", "BVT_948", "bosutinib", "NU6027", "canertinib")
## robustly scale expression data
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_unfiltered <- d.DeKegel_top5_pairs_drug_sens_expr %>%
filter(target %in% target_list) %>%
filter(grepl(str_c(compound_list, collapse = "|"), compound_name)) %>%
filter(!is.na(A2_log_tpm) | !is.na(dependency)) %>%
group_by(target, compound_id) %>%
mutate(scaled_A2_log_tpm = RobScale(A2_log_tpm, center = TRUE, scale = TRUE)) %>%
mutate(A2_expr_outlier_flag = case_when(
scaled_A2_log_tpm < -1.5 ~ "low",
scaled_A2_log_tpm > 1.5 ~ "high",
TRUE ~ "neither")) %>%
mutate(scaled_A1_log_tpm = RobScale(A1_log_tpm, center = TRUE, scale = TRUE)) %>%
mutate(A1_expr_outlier_flag = case_when(
scaled_A1_log_tpm < -1.5 ~ "low",
scaled_A1_log_tpm > 1.5 ~ "high",
TRUE ~ "neither")) %>%
ungroup() %>%
mutate(A2_expr_outlier_flag = factor(A2_expr_outlier_flag, levels = c("low", "neither", "high")))%>%
mutate(A1_expr_outlier_flag = factor(A1_expr_outlier_flag, levels = c("low", "neither", "high")))
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_unfiltered %>%
group_by(target, compound_id) %>%
## get rid of drug/targets that don't have any "low" cell lines
filter(any(A2_expr_outlier_flag == "low")) %>%
ungroup()
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_grp_med_diff <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers %>%
group_by(target, compound_id, A2_expr_outlier_flag) %>%
## calculate median dependency score for each outlier group
summarize(grp_med_dependency = median(dependency)) %>%
pivot_wider(names_from = A2_expr_outlier_flag, values_from = grp_med_dependency) %>%
## calculate difference between low and neither groups
mutate(grp_med_diff = low - neither) %>%
dplyr::select(target, compound_id, grp_med_diff)
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers %>%
left_join(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_grp_med_diff, by = c("target", "compound_id"))
## low outliers and drug sensitivity
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_pvals <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers %>%
filter(A2_expr_outlier_flag != "high") %>%
group_by(target, compound_id) %>%
summarize(p_val = wilcox.test(dependency ~ A2_expr_outlier_flag)$p.value)
## adjust for multiple testing overall
d.stats <- BH_adjust(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_pvals)
## Adding missing grouping variables: `target`
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers %>%
left_join(d.stats, by = c("target", "compound_id"))
## filter for hits where low-expressing cell lines are more sensitive to the drug
## (low group has lower median than neither group)
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst %>%
filter(grp_med_diff < 0)
## filter for hits where low-expressing ell lines are more resistant to the drug
## (low group has higher median than neither group)
d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_resist <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst %>%
filter(grp_med_diff > 0)
summarize_stats(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens)
## df: d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens
## # target/compound pairs with
## p < 0.05: 5
## p < 0.01: 5
## FDR < 0.1: 5
## FDR < 0.05: 5
summarize_stats(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_resist)
## df: d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_resist
## # target/compound pairs with
## p < 0.05: 0
## p < 0.01: 0
## FDR < 0.1: 0
## FDR < 0.05: 0
Plot
plot_list <- d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens %>%
unite("target_compound_id", c("target", "compound_name"), sep = "\n", remove = FALSE) %>%
distinct(target_compound_id) %>%
pull(target_compound_id)
make_outlier_plot(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens,
plot_list[1:4], "low", A2_expr_outlier_flag, A2_log_tpm)

make_outlier_plot(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens,
plot_list[5:8], "low", A2_expr_outlier_flag, A2_log_tpm)

make_lineage_plot(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens,
plot_list, A2_log_tpm, A2_expr_outlier_flag)

write_rds(d.DeKegel_top5_pairs_drug_sens_expr_A2_lm_pos_top_outliers_wrst_sens,
file.path(out_dir, "d.top_outlier_pairs"))